from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression  # 逻辑回归
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
import time

"""
逻辑回归算法  使用随机梯度下降  获取 回归系数；因此需要数据标准化
"""

cancer = load_breast_cancer()
# 569样本  30个特征
print("keys", cancer.keys())

# print(cancer.DESCR)

# 获取特征和标签
X, y = cancer.data, cancer.target

# print("标签", cancer.target_names)  # 0恶性  1良性


X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    stratify=y,
                                                    random_state=1
                                                    )

# std = StandardScaler()
# std.fit(X_train)  # 在训练集在拟合【计算每列特征均值和标准差】
# X_train = std.transform(X_train)  # 【转换】
# X_test = std.transform(X_test)  # 【转换】

# alg = LogisticRegression()  #  0.9912280701754386
alg = GaussianNB()  # 0.9210526315789473
# alg = KNeighborsClassifier()

start_time = time.time()
alg.fit(X_train, y_train)

print("测试集得分", alg.score(X_test,y_test))

print("总时间", time.time() -start_time)

"""
高斯朴素贝叶斯：【最快的】
测试集得分 0.9385964912280702
总时间 0.0049860477447509766

逻辑回归：【效果好，时间慢】
测试集得分 0.9912280701754386
总时间 0.022986650466918945


KNN
测试集得分 0.9736842105263158
总时间 0.015993356704711914
"""

